Section 4 AB-effects (metagenomes)
4.1 Load in variants
vcfToDataframe <- function(vcf.files, contig_mapping = contig_mapping, gff.df = gff.df) {
require(vcfR)
res <- list()
for (file in vcf.files) {
library(data.table)
vcf.content <- vcfR::read.vcfR(file, verbose = FALSE)
vcf.fix <- as.data.frame(vcf.content@fix) # contains chr, position and substitution informations
vcf.info <- vcfR::INFO2df(vcf.content) # get INFO field, contains DP, AF informations
if(nrow(vcf.fix) > 0) { # there are variants
dat <- as.data.frame(cbind(vcf.fix[,c(1, 2, 4, 5, 6)], vcf.info[,c(1, 2)]))
dat$majorAF <- sapply(dat$AF, minorAfToMajorAf) # transforms e.g. AF of 0.1 to 0.9, 0.9 stays 0.9 and 0.5 stays 0.5
dat$genome <- contig_mapping[match(dat$CHROM, contig_mapping$contig),]$genome # map chr information to genome name e.g. NHMU01000001.1 -> i48
dat$genome_hr <- translateGenomeIdToFullName(tolower(dat$genome))
dat$mouse.id <- substr(tools::file_path_sans_ext(basename(file)), 1, 4)
dat$mouse.group <- translateMouseIdToTreatmentGroup(dat$mouse.id)
dat$day <- as.integer(substr(basename(file), 6, 7))
dat$phase <- binDaysByPhase(as.numeric(as.matrix(dat$day)))
dat$phase_num <- binDaysByPhaseGroup(dat$day)
dat$dp <- as.numeric(as.matrix(vcf.info$DP))
# annotate overlay of gene
dt.gff <- data.table(start = gff.df$start, end = gff.df$end,
chr = as.character(as.matrix(gff.df$chr)), feature = gff.df$product)
colnames(dat)[1:2] <- c("chr", "start")
dat$start <- as.integer(as.matrix(dat$start))
dat$chr <- as.character(as.matrix(dat$chr))
dat$end <- dat$start
dat2 <- as.data.table(dat)
setkey(dt.gff, chr, start, end)
annotated <- foverlaps(dat2, dt.gff, type="within", mult="first")
res[[tools::file_path_sans_ext(basename(file))]] <- annotated # add vcf df to list
} else{
message(paste("Skipping", file))
}
}
df <- as.data.frame(do.call(rbind, res)) # merge list to df
return(df)
}# load in reference information
gff.files <- Sys.glob("data/references/joined_reference_curated_ecoli/*.gff")
gff.df <- NULL
for (gff.file in gff.files){
message(gff.file)
gff <- rtracklayer::readGFF(gff.file)
# subset since different columns are present on gff files
relevant <- data.frame(start = gff$start, end = gff$end, type = as.character(as.matrix(gff$type)), gene = as.character(as.matrix(gff$gene)), product = as.character(as.matrix(gff$product)), chr = as.character(as.matrix(gff$seqid)))
relevant$genome <- substr(basename(gff.file), 1, nchar(basename(gff.file))-4)
gff.df <- rbind(gff.df, relevant)
}## data/references/joined_reference_curated_ecoli/joined_reference_curated_ecoli.gff
# load in contig information
contig_mapping <- read.csv2("data/contig_mapping_new_ref.csv", sep =";", header=T, stringsAsFactors = F) # this file contains contig names of the 12 OligoMM genomes
# load in vcf files
vcf.files <- Sys.glob("out_philipp/all_vcf/*.vcf")
vcf.samples <- suppressWarnings(vcfToDataframe(vcf.files, contig_mapping, gff.df = gff.df))## Skipping out_philipp/all_vcf/1683d09_S49.vcf
vcf.samples$feature <- as.character(as.matrix(vcf.samples$feature))
vcf.samples[which(is.na(vcf.samples$feature)),]$feature <- "outside ORFs"
vcf.samples $start <- NULL
vcf.samples $end <- NULL
vcf.samples $i.end <- NULL
colnames(vcf.samples)[3] <- "POS"
vcf.samples$ref_size <- nchar(as.character(as.matrix(vcf.samples$REF)))
vcf.samples$alt_size <- nchar(as.character(as.matrix(vcf.samples$ALT)))
vcf.samples$alteration <- paste(as.character(vcf.samples$REF), "->",as.character(vcf.samples$ALT))
vcf.samples$alteration_type <- "SNP"
vcf.samples[which(vcf.samples$ref_size < vcf.samples$alt_size), ]$alteration_type <- "insertion"
vcf.samples[which(vcf.samples$ref_size > vcf.samples$alt_size), ]$alteration_type <- "deletion"
saveRDS(vcf.samples, file = "data/rds/omm_ab.rds")4.2 add presence in reseq experiment
load in variants from resequencing run and mark if the variants form this study overlap
dat <- readRDS("data/rds/omm_ab.rds")
dat_re <- readRDS("data/rds/reseq.rds")
dat_re$variant.id <- paste(dat_re$chr, dat_re$POS, dat_re$REF, dat_re$ALT, sep = "-")
# consiger there if AF is > .5
dat_re <- dat_re[which(dat_re$AF > .5),]Save as table
dat <- readRDS("data/rds/omm_ab.rds")
write.table(vcf_samples, file = "results/tables/omm_antibiotic_variants_long.csv", sep = ";",
col.names = T, row.names = F, quote = F)
dat$variant.id <- paste(dat$chr, dat$POS, dat$REF, dat$ALT, sep = "-")
dat$fixed <- FALSE
dat[which(!is.na(match(dat$variant.id, dat_re$variant.id))),]$fixed <- TRUE
saveRDS(dat, file = "data/rds/omm_ab_with_fixed.rds")4.3 AF frequency
p <- ggplot(vcf.samples, aes(AF, fill = genome)) + geom_histogram()
p <- p + facet_grid(mouse.id + mouse.group ~ genome + genome_hr)
p <- p + theme_classic() + xlab("AF") + ylab("occurence")
print(p)## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Figure 4.1: AF of resequenced strains
p <- ggplot(vcf.samples, aes(majorAF, fill = genome)) + geom_histogram()
p <- p + facet_grid(mouse.id + mouse.group ~ genome + genome_hr)
p <- p + theme_classic() + xlab("AF") + ylab("occurence")
print(p)## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Figure 4.2: major AF of resequenced strains
4.4 Average coverage of variants by genome
dat <- readRDS("data/rds/omm_ab.rds")
dat_subset <- dat[which(dat$mouse.group == 'Water'),]
p <- ggplot(dat_subset, aes(x = reorder(genome, -DP), y = DP , color = genome)) + geom_boxplot()
p <- p + theme_classic() + xlab("AF") + ylab("depth of variant")
p <- p + scale_y_log10()
print(p)
4.5 number of variants per samples
dat <- readRDS("data/rds/omm_ab.rds")
dat$dummy <- 1
dat.agg <- aggregate(dummy ~ mouse.id + mouse.group + day + phase, dat, sum)
DT::datatable(dat.agg)4.5.1 number of variants per treatment group
p <- ggplot(dat.agg, aes(x = mouse.id, y = dummy, color = day))
p <- p + geom_jitter(shape = 4) + facet_grid(. ~ mouse.group, scales = "free_x")
p <- p + geom_boxplot() + theme_classic() + xlab("Mouse ID") + ylab("number of variants")
plotly::ggplotly(p)Figure 4.3: number of variants of all 12 OMM genomes by mouse
4.6 Heatmap
All mice
dat <- readRDS("data/rds/omm_ab.rds")
dat$sample.id <- paste0(dat$mouse.id, "-",dat$day)
dat$variant.id <- paste0(dat$POS, "-", dat$REF, "-", dat$ALT)
data.wide <- dcast(dat, variant.id ~ sample.id, value.var = "AF")## Warning in dcast(dat, variant.id ~ sample.id, value.var = "AF"): The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the reshape2::dcast; please note that
## reshape2 is deprecated, and this redirection is now deprecated as well. Please do this redirection yourself like reshape2::dcast(dat). In the next version, this warning will become an error.
data.wide[is.na(data.wide)] <- 0
rownames(data.wide) <- data.wide$variant.id
data.wide$variant.id <- NULL
library(circlize)
library(ComplexHeatmap)
heat <- data.matrix(data.wide)
# limit to variants that are present in at least 10% of samples
heat_num <- rowSums(heat != 0)
heat2 <- heat[which(heat_num > ncol(heat)/10),]
# limit to variants that have a high variance
heat_var_num <- matrixStats::rowVars(heat2)
heat3 <- heat2[which(heat_var_num > quantile(heat_var_num, 0.5)) ,]
dat$dummy <- 1
annot.data <- aggregate(dummy ~ mouse.id + mouse.group + day + phase, dat, sum)
annot.data$sample.id <- paste0(annot.data$mouse.id, "-",annot.data$day)
heat3.mouse.id <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.id
heat3.day <- annot.data[match(colnames(heat3), annot.data$sample.id),]$day
heat3.mouse.group <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.group
heat3.phase <- annot.data[match(colnames(heat3), annot.data$sample.id),]$phase
heat3.phase2 <- ifelse(heat3.phase == "post-treatment", 6, NA)
ord = data.frame(day = heat3.day, mouse.id =heat3.mouse.id )
occ = as.data.frame(table(heat3.mouse.id))
ord$occ <- occ[match(ord$mouse.id, occ$heat3.mouse.id),]$Freq
data.wide.sub <- dat[match(colnames(heat3), dat$sample.id),]
col_fun = colorRamp2(c(0, 0.5, 1), c("white", "yellow", "red"))
qpcr <- read.table("qpcr.csv", header = T, sep = ";")
qpcr$universal <- NULL
rownames(qpcr) <- paste0(qpcr$mouse, "-",qpcr$day)
qpcr <- qpcr[,-c(1:5)]
qpcr <- apply(qpcr, 1, function(x) x/sum(x))
qpcr <- qpcr[,which(colnames(qpcr) %in% colnames(heat3))]
qpcr <- qpcr[,match(colnames(heat3), colnames(qpcr))]
#pdf("heat.pdf", width= 10, height = 10)
Heatmap(heat3, name = "AF", col = col_fun, border = TRUE,
top_annotation = HeatmapAnnotation(num = anno_lines(colSums(heat3),
smooth = TRUE,border = TRUE), ra = anno_barplot(t(qpcr),
bar_width = 1,gp = gpar(fill = 1:12), height = unit(3, "cm")),
mouse = heat3.mouse.id,
group = heat3.mouse.group,
phase = heat3.phase,
day=anno_simple(heat3.day, pch =heat3.phase2)),
cluster_columns =F,
column_order = order(ord$occ, ord$mouse.id, ord$day),
right_annotation = rowAnnotation(prev = anno_barplot(rowSums(heat3))),
row_gap = unit(0, "mm"), column_gap = unit(0, "mm"),
column_split = heat3.mouse.group,
column_names_gp = gpar(fontsize =5),
row_names_gp = gpar(fontsize = 3),
show_row_dend = F,
show_row_names = F,
show_column_dend = F
)
4.7 with information if SNP was observed in resequencing
all high-variant
dat <- readRDS("data/rds/omm_ab_with_fixed.rds")
dat$sample.id <- paste0(dat$mouse.id, "-",dat$day)
dat$variant.id <- paste0(dat$genome_hr,"-", dat$fixed, "-", dat$POS, "-", dat$REF, "-", dat$ALT)
data.wide <- dcast(dat, variant.id ~ sample.id, value.var = "AF")## Warning in dcast(dat, variant.id ~ sample.id, value.var = "AF"): The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the reshape2::dcast; please note that
## reshape2 is deprecated, and this redirection is now deprecated as well. Please do this redirection yourself like reshape2::dcast(dat). In the next version, this warning will become an error.
data.wide[is.na(data.wide)] <- 0
rownames(data.wide) <- data.wide$variant.id
data.wide$variant.id <- NULL
library(circlize)
library(ComplexHeatmap)
heat <- data.matrix(data.wide)
# limit to variants that are present in at least 10% of samples
heat_num <- rowSums(heat != 0)
heat2 <- heat[which(heat_num > ncol(heat)/10),]
# limit to variants that have a high variance
heat_var_num <- matrixStats::rowVars(heat2)
heat3 <- heat2[which(heat_var_num > quantile(heat_var_num, 0.5)) ,]
dat$dummy <- 1
annot.data <- aggregate(dummy ~ mouse.id + mouse.group + day + phase, dat, sum)
annot.data$sample.id <- paste0(annot.data$mouse.id, "-",annot.data$day)
heat3.mouse.id <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.id
heat3.day <- annot.data[match(colnames(heat3), annot.data$sample.id),]$day
heat3.mouse.group <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.group
heat3.phase <- annot.data[match(colnames(heat3), annot.data$sample.id),]$phase
heat3.phase2 <- ifelse(heat3.phase == "post-treatment", 6, NA)
ord = data.frame(day = heat3.day, mouse.id =heat3.mouse.id )
occ = as.data.frame(table(heat3.mouse.id))
ord$occ <- occ[match(ord$mouse.id, occ$heat3.mouse.id),]$Freq
data.wide.sub <- dat[match(colnames(heat3), dat$sample.id),]
col_fun = colorRamp2(c(0, 0.5, 1), c("white", "yellow", "red"))
qpcr <- read.table("qpcr.csv", header = T, sep = ";")
qpcr$universal <- NULL
rownames(qpcr) <- paste0(qpcr$mouse, "-",qpcr$day)
qpcr <- qpcr[,-c(1:5)]
qpcr <- apply(qpcr, 1, function(x) x/sum(x))
qpcr <- qpcr[,which(colnames(qpcr) %in% colnames(heat3))]
qpcr <- qpcr[,match(colnames(heat3), colnames(qpcr))]
bug <- sapply(strsplit(rownames(heat3), split='-', fixed=TRUE), `[`, 1)
fixed <- sapply(strsplit(rownames(heat3), split='-', fixed=TRUE), `[`, 2)
pdf("heat.pdf", width= 10, height = 10)
Heatmap(heat3, name = "AF", col = col_fun, border = TRUE,
top_annotation = HeatmapAnnotation(num = anno_lines(colSums(heat3),
smooth = TRUE,border = TRUE), ra = anno_barplot(t(qpcr),
bar_width = 1,gp = gpar(fill = 1:12), height = unit(3, "cm")),
mouse = heat3.mouse.id,
group = heat3.mouse.group,
phase = heat3.phase,
day=anno_simple(heat3.day, pch =heat3.phase2)),
cluster_columns =F,
column_order = order(ord$occ, ord$mouse.id, ord$day),
right_annotation = rowAnnotation(fixed = fixed, bug = bug, prev = anno_barplot(rowSums(heat3)), col = bugcolors),
row_gap = unit(0, "mm"), column_gap = unit(0, "mm"),
column_split = heat3.mouse.group,
column_names_gp = gpar(fontsize =5),
row_names_gp = gpar(fontsize = 3),
show_row_dend = F,
show_row_names = F,
show_column_dend = F
)
dev.off()## pdf
## 2
All that are fixed
dat <- readRDS("data/rds/omm_ab_with_fixed.rds")
dat <- dat[which(dat$fixed == TRUE),]
dat$sample.id <- paste0(dat$mouse.id, "-",dat$day)
dat$variant.id <- paste0(dat$genome_hr,"-", dat$fixed, "-", dat$POS, "-", dat$REF, "-", dat$ALT)
data.wide <- dcast(dat, variant.id ~ sample.id, value.var = "AF")## Warning in dcast(dat, variant.id ~ sample.id, value.var = "AF"): The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the reshape2::dcast; please note that
## reshape2 is deprecated, and this redirection is now deprecated as well. Please do this redirection yourself like reshape2::dcast(dat). In the next version, this warning will become an error.
data.wide[is.na(data.wide)] <- 0
rownames(data.wide) <- data.wide$variant.id
data.wide$variant.id <- NULL
library(circlize)
library(ComplexHeatmap)
heat <- data.matrix(data.wide)
# limit to variants that are present in at least 10% of samples
heat_num <- rowSums(heat != 0)
heat2 <- heat[which(heat_num > ncol(heat)/10),]
# limit to variants that have a high variance
heat_var_num <- matrixStats::rowVars(heat2)
heat3 <- heat2[which(heat_var_num > quantile(heat_var_num, 0.5)) ,]
heat3 <- heat
dat$dummy <- 1
annot.data <- aggregate(dummy ~ mouse.id + mouse.group + day + phase, dat, sum)
annot.data$sample.id <- paste0(annot.data$mouse.id, "-",annot.data$day)
heat3.mouse.id <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.id
heat3.day <- annot.data[match(colnames(heat3), annot.data$sample.id),]$day
heat3.mouse.group <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.group
heat3.phase <- annot.data[match(colnames(heat3), annot.data$sample.id),]$phase
heat3.phase2 <- ifelse(heat3.phase == "post-treatment", 6, NA)
ord = data.frame(day = heat3.day, mouse.id =heat3.mouse.id )
occ = as.data.frame(table(heat3.mouse.id))
ord$occ <- occ[match(ord$mouse.id, occ$heat3.mouse.id),]$Freq
data.wide.sub <- dat[match(colnames(heat3), dat$sample.id),]
col_fun = colorRamp2(c(0, 0.5, 1), c("white", "yellow", "red"))
qpcr <- read.table("qpcr.csv", header = T, sep = ";")
qpcr$universal <- NULL
rownames(qpcr) <- paste0(qpcr$mouse, "-",qpcr$day)
qpcr <- qpcr[,-c(1:5)]
qpcr <- apply(qpcr, 1, function(x) x/sum(x))
qpcr <- qpcr[,which(colnames(qpcr) %in% colnames(heat3))]
qpcr <- qpcr[,match(colnames(heat3), colnames(qpcr))]
bug <- sapply(strsplit(rownames(heat3), split='-', fixed=TRUE), `[`, 1)
fixed <- sapply(strsplit(rownames(heat3), split='-', fixed=TRUE), `[`, 2)
Heatmap(heat3, name = "AF", col = col_fun, border = TRUE,
column_order = order(ord$occ, ord$mouse.id, ord$day),cluster_columns =F,
right_annotation = rowAnnotation(fixed = fixed, bug = bug, prev = anno_barplot(rowSums(heat3)), col = bugcolors),
row_gap = unit(0, "mm"), column_gap = unit(0, "mm"),
top_annotation = HeatmapAnnotation(num = anno_lines(colSums(heat3),
smooth = TRUE,border = TRUE), ra = anno_barplot(t(qpcr),
bar_width = 1,gp = gpar(fill = 1:12), height = unit(3, "cm")),
mouse = heat3.mouse.id,
group = heat3.mouse.group,
phase = heat3.phase,
day=anno_simple(heat3.day, pch =heat3.phase2)),
column_names_gp = gpar(fontsize =5),
row_names_gp = gpar(fontsize = 3),
show_row_dend = F,
show_row_names = F,
show_column_dend = F
)
All mice clustered
dat <- readRDS("data/rds/omm_ab.rds")
dat$sample.id <- paste0(dat$mouse.id, "-",dat$day)
dat$variant.id <- paste0(dat$POS, "-", dat$REF, "-", dat$ALT)
data.wide <- dcast(dat, variant.id ~ sample.id, value.var = "AF")## Warning in dcast(dat, variant.id ~ sample.id, value.var = "AF"): The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the reshape2::dcast; please note that
## reshape2 is deprecated, and this redirection is now deprecated as well. Please do this redirection yourself like reshape2::dcast(dat). In the next version, this warning will become an error.
data.wide[is.na(data.wide)] <- 0
rownames(data.wide) <- data.wide$variant.id
data.wide$variant.id <- NULL
library(circlize)
library(ComplexHeatmap)
heat <- data.matrix(data.wide)
# limit to variants that are present in at least 10% of samples
heat_num <- rowSums(heat != 0)
heat2 <- heat[which(heat_num > ncol(heat)/10),]
# limit to variants that have a high variance
heat_var_num <- matrixStats::rowVars(heat2)
heat3 <- heat2[which(heat_var_num > quantile(heat_var_num, 0.5)) ,]
dat$dummy <- 1
annot.data <- aggregate(dummy ~ mouse.id + mouse.group + day + phase, dat, sum)
annot.data$sample.id <- paste0(annot.data$mouse.id, "-",annot.data$day)
heat3.mouse.id <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.id
heat3.day <- annot.data[match(colnames(heat3), annot.data$sample.id),]$day
heat3.mouse.group <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.group
heat3.phase <- annot.data[match(colnames(heat3), annot.data$sample.id),]$phase
heat3.phase2 <- ifelse(heat3.phase == "post-treatment", 6, NA)
ord = data.frame(day = heat3.day, mouse.id =heat3.mouse.id )
occ = as.data.frame(table(heat3.mouse.id))
ord$occ <- occ[match(ord$mouse.id, occ$heat3.mouse.id),]$Freq
data.wide.sub <- dat[match(colnames(heat3), dat$sample.id),]
col_fun = colorRamp2(c(0, 0.5, 1), c("white", "yellow", "red"))
qpcr <- read.table("qpcr.csv", header = T, sep = ";")
qpcr$universal <- NULL
rownames(qpcr) <- paste0(qpcr$mouse, "-",qpcr$day)
qpcr <- qpcr[,-c(1:5)]
qpcr <- apply(qpcr, 1, function(x) x/sum(x))
qpcr <- qpcr[,which(colnames(qpcr) %in% colnames(heat3))]
qpcr <- qpcr[,match(colnames(heat3), colnames(qpcr))]
#pdf("heat.pdf", width= 10, height = 10)
Heatmap(heat3, name = "AF", col = col_fun, border = TRUE,
top_annotation = HeatmapAnnotation(num = anno_lines(colSums(heat3),
smooth = TRUE,border = TRUE), ra = anno_barplot(t(qpcr),
bar_width = 1,gp = gpar(fill = 1:12), height = unit(3, "cm")),
mouse = heat3.mouse.id,
group = heat3.mouse.group,
phase = heat3.phase,
day=anno_simple(heat3.day, pch =heat3.phase2)),
cluster_columns =T,
right_annotation = rowAnnotation(prev = anno_barplot(rowSums(heat3))),
row_gap = unit(0, "mm"), column_gap = unit(0, "mm"),
column_names_gp = gpar(fontsize =5),
row_names_gp = gpar(fontsize = 3),
show_row_dend = F,
show_row_names = F,
show_column_dend = T
)
dat <- readRDS("data/rds/omm_ab.rds")
dat$sample.id <- paste0(dat$mouse.id, "-",dat$day)
dat$variant.id <- paste0(dat$POS, "-", dat$REF, "-", dat$ALT)
data.wide <- dcast(dat, variant.id ~ sample.id, value.var = "AF")## Warning in dcast(dat, variant.id ~ sample.id, value.var = "AF"): The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the reshape2::dcast; please note that
## reshape2 is deprecated, and this redirection is now deprecated as well. Please do this redirection yourself like reshape2::dcast(dat). In the next version, this warning will become an error.
data.wide[is.na(data.wide)] <- 0
rownames(data.wide) <- data.wide$variant.id
data.wide$variant.id <- NULL
library(circlize)
library(ComplexHeatmap)
heat <- data.matrix(data.wide)
# limit to variants that are present in at least 10% of samples
heat_num <- rowSums(heat != 0)
heat2 <- heat[which(heat_num > ncol(heat)/10),]
# limit to variants that have a high variance
heat_var_num <- matrixStats::rowVars(heat2)
heat3 <- heat2[which(heat_var_num > quantile(heat_var_num, 0.5)) ,]
dat$dummy <- 1
annot.data <- aggregate(dummy ~ mouse.id + mouse.group + day + phase, dat, sum)
annot.data$sample.id <- paste0(annot.data$mouse.id, "-",annot.data$day)
heat3.mouse.id <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.id
heat3.day <- annot.data[match(colnames(heat3), annot.data$sample.id),]$day
heat3.mouse.group <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.group
heat3.phase <- annot.data[match(colnames(heat3), annot.data$sample.id),]$phase
heat3.phase2 <- ifelse(heat3.phase == "post-treatment", 6, NA)
ord = data.frame(day = heat3.day, mouse.id =heat3.mouse.id )
occ = as.data.frame(table(heat3.mouse.id))
ord$occ <- occ[match(ord$mouse.id, occ$heat3.mouse.id),]$Freq
data.wide.sub <- dat[match(colnames(heat3), dat$sample.id),]
col_fun = colorRamp2(c(0, 0.5, 1), c("white", "yellow", "red"))
qpcr <- read.table("qpcr.csv", header = T, sep = ";")
qpcr$universal <- NULL
rownames(qpcr) <- paste0(qpcr$mouse, "-",qpcr$day)
qpcr <- qpcr[,-c(1:5)]
qpcr <- t(qpcr)
qpcr <- qpcr[,which(colnames(qpcr) %in% colnames(heat3))]
qpcr <- qpcr[,match(colnames(heat3), colnames(qpcr))]
#pdf("heat.pdf", width= 10, height = 10)
Heatmap(heat3, name = "AF", col = col_fun, border = TRUE,
top_annotation = HeatmapAnnotation(num = anno_lines(colSums(heat3),
smooth = TRUE,border = TRUE), ra = anno_barplot(t(qpcr),
bar_width = 1,gp = gpar(fill = 1:12), height = unit(3, "cm")),
mouse = heat3.mouse.id,
group = heat3.mouse.group,
phase = heat3.phase,
day=anno_simple(heat3.day, pch =heat3.phase2)),
cluster_columns =T,
right_annotation = rowAnnotation(prev = anno_barplot(rowSums(heat3))),
row_gap = unit(0, "mm"), column_gap = unit(0, "mm"),
column_names_gp = gpar(fontsize =5),
row_names_gp = gpar(fontsize = 3),
show_row_dend = F,
show_row_names = F,
show_column_dend = T
)
4.8 Focus on mouse where we have many time points
dat <- readRDS("data/rds/omm_ab.rds")
dat$rep.group <- translateMouseIdToReplicateGroup(dat$mouse.id)
dat <- dat[which(dat$rep.group == "Full"),]
dat$sample.id <- paste0(dat$mouse.id, "-",dat$day)
dat$variant.id <- paste0(dat$POS, "-", dat$REF, "-", dat$ALT)
data.wide <- dcast(dat, variant.id ~ sample.id, value.var = "AF")## Warning in dcast(dat, variant.id ~ sample.id, value.var = "AF"): The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the reshape2::dcast; please note that
## reshape2 is deprecated, and this redirection is now deprecated as well. Please do this redirection yourself like reshape2::dcast(dat). In the next version, this warning will become an error.
data.wide[is.na(data.wide)] <- 0
rownames(data.wide) <- data.wide$variant.id
data.wide$variant.id <- NULL
heat <- data.matrix(data.wide)
# limit to variants that are present in at least 10% of samples
heat_num <- rowSums(heat != 0)
heat2 <- heat[which(heat_num > ncol(heat)/10),]
# limit to variants that have a high variance
heat_var_num <- matrixStats::rowVars(heat2)
heat3 <- heat2[which(heat_var_num > quantile(heat_var_num, 0.50)) ,]
dat$dummy <- 1
annot.data <- aggregate(dummy ~ mouse.id + mouse.group + day + phase, dat, sum)
annot.data$sample.id <- paste0(annot.data$mouse.id, "-",annot.data$day)
heat3.mouse.id <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.id
heat3.day <- annot.data[match(colnames(heat3), annot.data$sample.id),]$day
ord = data.frame(day = heat3.day, mouse.id =heat3.mouse.id )
heat3.mouse.group <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.group
heat3.phase <- annot.data[match(colnames(heat3), annot.data$sample.id),]$phase
heat3.phase2 <- ifelse(heat3.phase == "post-treatment", 6, NA)
col_fun = colorRamp2(c(0, 0.5, 1), c("white", "yellow", "red"))
# order the heatmap by treatment group
Heatmap(heat3, name = "AF", col = col_fun, border = TRUE,
top_annotation = HeatmapAnnotation(num = anno_lines(colSums(heat3),
smooth = TRUE,border = TRUE),
day=anno_simple(heat3.day, pch =heat3.phase2 )),
cluster_columns =F,
column_order = order(ord$mouse.id, ord$day),
column_split = heat3.mouse.group,
column_names_gp = gpar(fontsize =5),
row_names_gp = gpar(fontsize = 8),
show_row_dend = F,
show_row_names = F,
show_column_dend = F
)
4.9 Akkermansia Muciniphila
4.9.1 area plot 1
dat <- readRDS("data/rds/omm_ab.rds")
dat$variant.id <- paste0(dat$POS, "-",dat$REF, "-", dat$ALT)
dat <- dat[which(dat$chr == "Akkermansia_muciniphila"),]
data.wide <- dcast(dat, day + mouse.id + mouse.group ~variant.id, value.var = "AF")## Warning in dcast(dat, day + mouse.id + mouse.group ~ variant.id, value.var = "AF"): The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the reshape2::dcast;
## please note that reshape2 is deprecated, and this redirection is now deprecated as well. Please do this redirection yourself like reshape2::dcast(dat). In the next version, this warning will become an
## error.
data.wide[is.na(data.wide)] <- 0
colMax <- function(X) apply(X, 2, max)
dat_mat <- data.wide[,-c(1:3)]
# filter variants
data.wide.reduced <- cbind(data.wide[,c(1:3)],dat_mat[,which(colMax(dat_mat)> 0.5)])
#data.wide.reduced <- data.wide
dat2 <- melt(data.wide.reduced, id.vars = c("day","mouse.id", "mouse.group"))## Warning in melt(data.wide.reduced, id.vars = c("day", "mouse.id", "mouse.group")): The melt generic in data.table has been passed a data.frame and will attempt to redirect to the relevant reshape2
## method; please note that reshape2 is deprecated, and this redirection is now deprecated as well. To continue using melt methods from reshape2 while both libraries are attached, e.g. melt.list, you can
## prepend the namespace like reshape2::melt(data.wide.reduced). In the next version, this warning will become an error.
dat3 <- dat2 %>% group_by(day, mouse.id) %>% mutate(Nor = value/sum(value))
set.seed(123)
col_list <- sort(unique(dat3$variable))
cols <-randomcoloR::randomColor(length(unique(dat3$variable)))
# Muller plot
p <- ggplot(dat3, aes(x = day, y = Nor, group = variable, fill=variable, label = ))
p <- p + geom_area(color = "black", size = 0.1)
p <- p + facet_wrap(~ mouse.group + mouse.id, ncol=3)
p <- p + theme_minimal() + theme(legend.position = "none")
p <- p + ylab("Fraction")
p <- p + scale_fill_manual(values= cols, breaks = col_list)
p <- p + geom_vline(xintercept = c(4, 18, 53, 67))
plotly::ggplotly(p)4.9.2 line plot
dat <- readRDS("data/rds/omm_ab.rds")
dat$variant.id <- paste0(dat$POS, "-",dat$REF, "-", dat$ALT)
dat <- dat[which(dat$chr == "Akkermansia_muciniphila"),]
data.wide <- dcast(dat, day + mouse.id + mouse.group~variant.id, value.var = "AF")## Warning in dcast(dat, day + mouse.id + mouse.group ~ variant.id, value.var = "AF"): The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the reshape2::dcast;
## please note that reshape2 is deprecated, and this redirection is now deprecated as well. Please do this redirection yourself like reshape2::dcast(dat). In the next version, this warning will become an
## error.
data.wide[is.na(data.wide)] <- 0
dat2 <- melt(data.wide, id.vars = c("day","mouse.id", "mouse.group"))## Warning in melt(data.wide, id.vars = c("day", "mouse.id", "mouse.group")): The melt generic in data.table has been passed a data.frame and will attempt to redirect to the relevant reshape2 method;
## please note that reshape2 is deprecated, and this redirection is now deprecated as well. To continue using melt methods from reshape2 while both libraries are attached, e.g. melt.list, you can prepend
## the namespace like reshape2::melt(data.wide). In the next version, this warning will become an error.
set.seed(123)
col_list <- sort(unique(dat3$variable))
cols <-randomcoloR::randomColor(length(unique(dat3$variable)))
p <- ggplot(dat2, aes(x = day, y =value))
p <- p + geom_line(aes(group = variable), alpha= 0.2)
p <- p + theme_minimal()
p <- p + facet_wrap(~mouse.group + mouse.id, ncol=3)
p <- p + geom_vline(xintercept = c(4, 18, 53, 67))
plotly::ggplotly(p)